# Functions
# Plot color palette
plot_color_palette <- function(input_cols) {
col_data <- tibble(color = input_cols) %>%
mutate(color = fct_inorder(color))
res <- col_data %>%
ggplot(aes(x = "color", fill = color)) +
geom_bar() +
scale_fill_manual(values = input_cols) +
theme_void()
res
}
# Filter list of Seurat objects for patient, normalize and merge objects
merge_sobj <- function(sobj_list, sample_order = NULL) {
res <- merge(
x = sobj_list[[1]],
y = sobj_list[2:length(sobj_list)],
add.cell.ids = names(sobj_list)
) %>%
ScaleData(assay = "RNA") %>%
ScaleData(assay = "adt") %>%
FindVariableFeatures(assay = "RNA")
# Set sample order
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(orig.ident = fct_relevel(orig.ident, sample_order)) %>%
column_to_rownames("cell_ids")
res
}
# Run PCA, cluster, and run UMAP using gene expression data
cluster_RNA <- function(sobj_in, assay = "RNA", resolution = 0.6,
dims = 1:40, prefix = "", ...) {
# Use FindNeighbors to construct a K-nearest neighbors graph based on the euclidean distance in
# PCA space, and refine the edge weights between any two cells based on the
# shared overlap in their local neighborhoods (Jaccard similarity).
# Use FindClusters to apply modularity optimization techniques such as the Louvain algorithm
# (default) or SLM, to iteratively group cells together
res <- sobj_in
# ScaleData
if (!str_c("ScaleData.", assay) %in% names(res@commands)) {
res <- res %>%
ScaleData(assay = assay)
}
# Perform PCA
# By default only variable features are used for PCA
res <- res %>%
RunPCA(assay = assay, ...) %>%
AddMetaData(
metadata = FetchData(., c("PC_1", "PC_2")),
col.name = str_c(prefix, c("PC_1", "PC_2"))
)
# Create nearest neighbors graph and find clusters
res <- res %>%
FindNeighbors(
assay = assay,
reduction = "pca",
dims = dims
) %>%
FindClusters(
resolution = resolution,
verbose = F
) %>%
AddMetaData(
metadata = Idents(.),
col.name = str_c(assay, "_clusters")
)
# Run UMAP, UMAP coordinates will get added to the meta.data by clustifyr
res <- res %>%
RunUMAP(
assay = assay,
dims = dims,
reduction.name = str_c(prefix, "umap"),
reduction.key = str_c(prefix, "UMAP_")
)
res
}
# Fit gaussian mixture model for given signal
fit_GMM <- function(sobj_in, data_column = "adt_ovalbumin") {
# Fit GMM for OVA signal
data_df <- sobj_in %>%
FetchData(data_column)
mixmdl <- data_df %>%
pull(data_column) %>%
normalmixEM()
# New column names
ova_names <- c("low", "high")
comp_names <- c("comp.1", "comp.2")
if (mixmdl$mu[1] > mixmdl$mu[2]) {
ova_names <- rev(ova_names)
}
names(comp_names) <- ova_names
names(mixmdl$mu) <- ova_names
names(mixmdl$sigma) <- ova_names
names(mixmdl$lambda) <- ova_names
# Divide into OVA groups
res <- data.frame(
cell_id = rownames(data_df),
data = data_df[, data_column],
mixmdl$posterior
) %>%
dplyr::rename(!!sym(data_column) := data) %>%
rename(all_of(comp_names)) %>%
mutate(GMM_grp = if_else(low > 0.5, "low", "high")) %>%
column_to_rownames("cell_id")
res <- list(
res = res,
mu = mixmdl$mu,
sigma = mixmdl$sigma,
lambda = mixmdl$lambda
)
res
}
# Add distribution of GMM component to plot
add_stat_fun <- function(gmm_in, cols_in, key) {
# dnorm provides density for the normal distribution given the mean and
# standard deviation. Lambda is used to adjust for the mixture composition.
# mu: Mean of component
# sig: Standard deviation of component
# lam: Lambda of component (mixture weight)
plot_mix_comps <- function(x, mu, sigma, lam) {
lam * dnorm(x, mu, sigma)
}
stat_function(
geom = "line",
fun = plot_mix_comps,
args = list(gmm_in$mu[key], gmm_in$sigma[key], gmm_in$lambda[key]),
color = cols_in[key],
lwd = 1
)
}
# Overlay feature data on UMAP or tSNE
# Cannot change number of columns when using FeaturePlot with split.by
plot_features <- function(sobj_in, x = "UMAP_1", y = "UMAP_2", feature, data_slot = "data",
split_id = NULL, pt_size = 0.25, pt_outline = NULL, plot_cols = c("#fafafa", "#e31a1c"),
feat_levels = NULL, split_levels = NULL, min_pct = NULL, max_pct = NULL,
calc_cor = F, lm_line = F, lab_size = 3.7, lab_pos = c(0.8, 0.9), show_title = F,
...) {
# Format imput data
counts <- sobj_in
if ("Seurat" %in% class(sobj_in)) {
vars <- c(x, y, feature)
if (!is.null(split_id)) {
vars <- c(vars, split_id)
}
counts <- sobj_in %>%
FetchData(vars = unique(vars), slot = data_slot) %>%
as_tibble(rownames = "cell_ids")
}
# Rename features
if (!is.null(names(feature))) {
counts <- counts %>%
rename(!!!syms(feature))
feature <- names(feature)
}
if (!is.null(names(x))) {
counts <- counts %>%
rename(!!!syms(x))
x <- names(x)
}
if (!is.null(names(y))) {
counts <- counts %>%
rename(!!!syms(y))
y <- names(y)
}
if (show_title) {
counts <- counts %>%
gather(key, value, !!sym(feature))
feature <- "value"
}
# Set min and max values for feature
if (!is.null(min_pct) || !is.null(max_pct)) {
counts <- counts %>%
mutate(
pct_rank = percent_rank(!!sym(feature)),
max_val = ifelse(pct_rank > max_pct, !!sym(feature), NA),
max_val = min(max_val, na.rm = T),
min_val = ifelse(pct_rank < min_pct, !!sym(feature), NA),
min_val = max(min_val, na.rm = T),
!!sym(feature) := if_else(!!sym(feature) > max_val, max_val, !!sym(feature)),
!!sym(feature) := if_else(!!sym(feature) < min_val, min_val, !!sym(feature))
)
}
# Set feature order
if (!is.null(feat_levels)) {
counts <- counts %>%
mutate(!!sym(feature) := fct_relevel(!!sym(feature), feat_levels))
}
# Set facet order
if (!is.null(split_id)) {
counts <- counts %>%
mutate(split_id = !!sym(split_id))
if (!is.null(split_levels)) {
counts <- counts %>%
mutate(split_id = fct_relevel(split_id, split_levels))
}
}
# Calculate correlation
if (calc_cor) {
if (!is.null(split_id)) {
counts <- counts %>%
group_by(split_id)
}
counts <- counts %>%
mutate(
cor_lab = cor(!!sym(x), !!sym(y)),
cor_lab = round(cor_lab, digits = 2),
cor_lab = str_c("r = ", cor_lab),
min_x = min(!!sym(x)),
max_x = max(!!sym(x)),
min_y = min(!!sym(y)),
max_y = max(!!sym(y)),
lab_x = (max_x - min_x) * lab_pos[1] + min_x,
lab_y = (max_y - min_y) * lab_pos[1] + min_y
)
}
# Create scatter plot
res <- counts %>%
arrange(!!sym(feature)) %>%
ggplot(aes(!!sym(x), !!sym(y), color = !!sym(feature)))
if (!is.null(pt_outline)) {
res <- res +
geom_point(aes(fill = !!sym(feature)), size = pt_outline, color = "black", show.legend = F)
}
res <- res +
geom_point(size = pt_size)
# Add regression line
if (lm_line) {
res <- res +
geom_smooth(method = "lm", se = F, color = "black", size = 0.5, linetype = 2)
}
# Add correlation coefficient label
if (calc_cor) {
res <- res +
geom_text(
aes(x = lab_x, lab_y, label = cor_lab),
color = "black",
size = lab_size,
check_overlap = T,
show.legend = F
)
}
# Show facet-style title
if (show_title) {
res <- res +
facet_wrap(~ key, scales = "free") +
theme(legend.title = element_blank())
}
# Set feature colors
if (is.numeric(counts[[feature]])) {
res <- res +
scale_color_gradient(low = plot_cols[1], high = plot_cols[2])
} else {
res <- res +
scale_color_manual(values = plot_cols)
}
# Split plot into facets
if (!is.null(split_id)) {
res <- res +
facet_wrap(~ split_id, ...)
}
res
}
# Run gprofiler
run_gprofiler <- function(gene_list, genome = NULL, gmt_id = NULL,
dbases = c("GO:BP", "GO:MF", "KEGG"), ...) {
# Check for empty gene list
if (is_empty(gene_list)) {
return(NULL)
}
# Check arguments
if (is.null(genome) && is.null(gmt_id)) {
stop("ERROR: Must specifiy genome or gmt_id")
}
# Retrieve organism name for gProfileR
if (!is.null(genome)){
genomes <- list(
GRCm = "mmusculus",
GRCh = "hsapiens",
BDGP = "dmelanogaster"
)
org <- genome %>%
str_remove("[0-9]+$") %>%
genomes[[.]]
}
if (!is.null(gmt_id)) {
org <- gmt_id
dbases <- NULL
}
# Run gProfileR
res <- gene_list %>%
gost(
organism = org,
sources = dbases,
domain_scope = "annotated",
significant = T,
...
)
# Format and sort output data.frame
res <- res$result %>%
as_tibble()
if (!is_empty(res)) {
res <- res %>%
arrange(source, p_value)
}
res
}
# Create GO bubble plot
create_bubbles <- function(GO_df, plot_colors = theme_cols[c(1:2, 4, 9)],
n_terms = 15) {
# Check for empty inputs
if (is_empty(GO_df) || nrow(GO_df) == 0) {
res <- ggplot() +
geom_blank()
return(res)
}
# Shorten GO terms and database names
GO_data <- GO_df %>%
mutate(
term_id = str_remove(term_id, "(GO|KEGG):"),
term_id = str_c(term_id, " ", term_name),
term_id = str_to_lower(term_id),
term_id = str_trunc(term_id, 40, "right"),
source = fct_recode(
source,
"Biological\nProcess" = "GO:BP",
"Cellular\nComponent" = "GO:CC",
"Molecular\nFunction" = "GO:MF",
"KEGG" = "KEGG"
)
)
# Reorder database names
plot_levels <- c(
"Biological\nProcess",
"Cellular\nComponent",
"Molecular\nFunction",
"KEGG"
)
GO_data <- GO_data %>%
mutate(source = fct_relevel(source, plot_levels))
# Extract top terms for each database
top_GO <- GO_data %>%
group_by(source) %>%
arrange(p_value) %>%
dplyr::slice(1:n_terms) %>%
ungroup()
# Create bubble plots
res <- GO_data %>%
ggplot(aes(1.25, -log10(p_value), size = intersection_size)) +
geom_point(color = plot_colors, alpha = 0.5, show.legend = T) +
geom_text_repel(
aes(2, -log10(p_value), label = term_id),
data = top_GO,
size = 2.3,
direction = "y",
hjust = 0,
segment.size = NA
) +
xlim(1, 8) +
labs(y = "-log10(p-value)") +
theme_info +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
facet_wrap(~ source, scales = "free", nrow = 1)
res
}
# Plot percentage of cells in given groups
plot_cell_count <- function(sobj_in, group_id, split_id = NULL, group_order = NULL,
fill_id, plot_colors = theme_cols,
x_lab = "Cell type", y_lab = "Fraction of cells",
bar_pos = "fill", order_count = T, bar_line = 0, ...) {
res <- sobj_in@meta.data %>%
rownames_to_column("cell_ids") %>%
mutate(
group_id := !!sym(group_id),
fill_id := !!sym(fill_id)
)
if (!is.null(group_order)) {
res <- res %>%
mutate(group_id = fct_relevel(group_id, group_order))
}
if (!is.null(split_id)) {
res <- res %>%
mutate(split_id := !!sym(split_id))
}
if (order_count) {
res <- res %>%
mutate(fill_id = fct_reorder(fill_id, cell_ids, n_distinct))
}
res <- res %>%
ggplot(aes(group_id, fill = fill_id)) +
geom_bar(position = bar_pos, size = bar_line, color = "black") +
scale_fill_manual(values = plot_colors) +
labs(x = x_lab, y = y_lab) +
theme_info
if (!is.null(split_id)) {
res <- res +
facet_wrap(~ split_id, ...)
}
res
}
# Run FindAllMarkers
find_markers <- function(input_sobj, only_pos = T, p_cutoff = 0.05, ...) {
res <- input_sobj %>%
FindAllMarkers(only.pos = only_pos, ...) %>%
as_tibble() %>%
filter(p_val_adj < p_cutoff)
res
}
# Find cluster markers for each separate cell type
find_group_markers <- function(input_grp, input_sobj, grp_column, clust_column) {
res <- input_sobj %>%
subset(!!sym(grp_column) == input_grp)
clusts <- res@meta.data[, clust_column]
if (n_distinct(clusts) < 2) {
return(NULL)
}
Idents(res) <- res %>%
FetchData(clust_column)
res <- res %>%
find_markers() %>%
mutate(cell_type = input_grp)
res
}
# Create reference UMAP for comparisons
create_ref_umap <- function(input_sobj, pt_mtplyr = 1, color_guide, ...) {
res <- input_sobj %>%
plot_features(
pt_size = 0.1 * pt_mtplyr,
pt_outline = 0.4,
...
) +
guides(color = color_guide) +
blank_theme +
theme(
legend.position = "top",
legend.title = element_blank(),
legend.text = element_text(size = 10)
)
res
}
# Create UMAPs showing marker gene signal
create_marker_umaps <- function(input_sobj, input_markers, umap_col, add_outline = NULL, pt_mtplyr = 1) {
pt_size <- 0.25 * pt_mtplyr
res <- input_markers %>%
map(~ {
input_sobj %>%
plot_features(
feature = .x,
plot_cols = c("#fafafa", umap_col),
pt_outline = add_outline,
pt_size = pt_size
) +
ggtitle(.x) +
blank_theme +
theme(
plot.title = element_text(size = 13),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8),
legend.key.height = unit(0.1, "cm"),
legend.key.width = unit(0.3, "cm"),
axis.title.y = element_text(size = 13, color = "white"),
axis.text.y = element_text(size = 8, color = "white")
)
})
res
}
# Create boxplots showing marker gene signal
create_marker_boxes <- function(input_sobj, input_markers, clust_column, box_cols,
group = NULL, include_legend = F, all_boxes = F,
all_violins = F, order_boxes = T, n_boxes = 10,
n_rows = 2, pt_mtplyr = 1, ...) {
# Retrieve and format data for boxplots
box_data <- input_sobj %>%
FetchData(c(clust_column, input_markers)) %>%
as_tibble(rownames = "cell_id") %>%
mutate(grp = str_remove(!!sym(clust_column), "^[a-zA-Z0-9_]+-"))
input_markers <- input_markers %>%
str_trunc(9)
# Filter based on input group
if (!is.null(group)) {
box_data <- box_data %>%
filter(grp == group)
}
# Format data for plots
box_data <- box_data %>%
pivot_longer(cols = c(-cell_id, -grp, -!!sym(clust_column)), names_to = "key", values_to = "Counts") %>%
mutate(
!!sym(clust_column) := fct_relevel(!!sym(clust_column), names(box_cols)),
key = str_trunc(key, width = 9, side = "right"),
key = fct_relevel(key, input_markers)
)
# Order boxes by mean signal
if (order_boxes) {
box_data <- box_data %>%
mutate(!!sym(clust_column) := fct_reorder(!!sym(clust_column), Counts, mean, .desc = T))
}
n_clust <- box_data %>%
pull(clust_column) %>%
n_distinct()
# Create plots
n_cols <- ceiling(n_boxes / n_rows)
res <- box_data %>%
ggplot(aes(!!sym(clust_column), Counts, color = !!sym(clust_column))) +
facet_wrap(~ key, ncol = n_cols) +
scale_color_manual(values = box_cols) +
theme_info +
theme(
panel.spacing.x = unit(0.7, "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
legend.position = "none",
axis.title.x = element_blank(),
axis.title.y = element_text(size = 13),
axis.text.x = element_blank(),
axis.text.y = element_text(size = 8),
axis.ticks.x = element_blank(),
axis.line.x = element_blank()
)
# Adjust output plot type
if (n_clust > 6 || all_boxes) {
res <- res +
geom_boxplot(aes(fill = !!sym(clust_column), color = !!sym(clust_column)), size = 0, outlier.color = "#f0f0f0", outlier.size = 0.25) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
theme(...)
} else if (all_violins) {
res <- res +
geom_violin(aes(fill = !!sym(clust_column)), size = 0.2) +
stat_summary(fun = "median", geom = "point", shape = 22, size = 1, fill = "white") +
scale_fill_manual(values = box_cols) +
scale_color_manual(values = box_cols) +
theme(...)
} else {
pt_size <- 0.3 * pt_mtplyr
res <- res +
geom_quasirandom(size = pt_size) +
theme(...)
}
# Add legend
if (include_legend) {
res <- res +
guides(color = col_guide) +
theme(legend.position = "top")
}
# Add blank space for missing facets
n_keys <- n_distinct(box_data$key)
if (n_keys <= n_cols && n_rows > 1) {
n_keys <- if_else(n_keys == 1, 2, as.double(n_keys))
n_cols <- floor(n_cols / n_keys)
res <- res %>%
plot_grid(
ncol = n_cols,
nrow = 2
)
}
res
}
# Create figure summarizing marker genes
create_marker_fig <- function(input_sobj, input_markers, input_GO, clust_column,
input_umap, umap_color, fig_heights = c(0.46, 0.3, 0.3),
GO_genome = params$genome, box_colors, n_boxes = 10,
umap_outline = NULL, umap_mtplyr = 1, xlsx_name = NULL,
sheet_name = NULL, ...) {
blank_umap <- ggplot() +
geom_blank() +
theme_void()
marks_umap <- blank_umap
marks_boxes <- blank_umap
GO_bubbles <- blank_umap
# Create UMAPs showing marker gene signal
if (nrow(input_markers) > 0) {
top_marks <- input_markers$gene %>%
head(n_boxes)
clust_legend <- get_legend(input_umap)
input_umap <- input_umap +
theme(legend.position = "none")
marks_umap <- input_sobj %>%
create_marker_umaps(
input_markers = head(top_marks, 7),
umap_col = umap_color,
add_outline = umap_outline,
pt_mtplyr = umap_mtplyr
) %>%
append(list(input_umap), .)
marks_umap <- plot_grid(
plotlist = marks_umap,
ncol = 4,
nrow = 2,
align = "vh",
axis = "trbl"
)
marks_umap <- plot_grid(
clust_legend, marks_umap,
rel_heights = c(0.2, 0.9),
nrow = 2
)
# Create boxplots showing marker gene signal
marks_boxes <- input_sobj %>%
create_marker_boxes(
input_markers = top_marks,
clust_column = clust_column,
box_cols = box_colors,
n_boxes = n_boxes,
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
...
)
# Create GO term plots
if (nrow(input_GO) > 0) {
GO_bubbles <- input_GO %>%
create_bubbles(plot_colors = umap_color) +
theme(
plot.margin = unit(c(0.8, 0.2, 0.2, 0.2), "cm"),
strip.background = element_blank(),
strip.text = element_text(size = 13),
axis.title.y = element_text(size = 13),
axis.text.y = element_text(size = 8),
axis.line.x = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8)
)
# Write GO terms to excel file
if (!is.null(xlsx_name)) {
input_GO %>%
dplyr::select(
term_name, term_id,
source, effective_domain_size,
query_size, intersection_size,
p_value, significant
) %>%
arrange(source, p_value) %>%
write.xlsx(
file = str_c(xlsx_name, "_GO.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Write markers to excel file
if (!is.null(xlsx_name)) {
input_markers %>%
write.xlsx(
file = str_c(xlsx_name, "_markers.xlsx"),
sheetName = sheet_name,
append = T
)
}
}
# Create final figure
res <- plot_grid(
marks_umap, marks_boxes, GO_bubbles,
rel_heights = fig_heights,
ncol = 1,
align = "v",
axis = "rl"
)
if (nrow(input_markers) < n_boxes) {
res <- plot_grid(
marks_umap, marks_boxes, GO_bubbles,
rel_heights = fig_heights,
ncol = 1
)
}
res
}
# Filter clusters and set cluster order
set_cluster_order <- function(input_cols, input_marks, n_cutoff = 5) {
input_marks <- input_marks %>%
group_by(cluster) %>%
filter(n() >= n_cutoff) %>%
ungroup()
marks <- unique(input_marks$cluster)
res <- names(input_cols)
res <- res[res %in% marks]
res
}
# Create v1 panel for marker genes
create_marker_panel_v1 <- function(input_sobj, input_cols, input_umap = NULL, clust_column, order_boxes = T,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq_GO = F, umap_mtplyr = 6, xlsx_name = NULL, ...) {
# Set point size
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
# Find marker genes
Idents(input_sobj) <- input_sobj %>%
FetchData(clust_column)
markers <- find_markers(input_sobj)
# Find GO terms
GO_df <- markers %>%
group_by(cluster) %>%
do({
arrange(., p_val_adj) %>%
pull(gene) %>%
run_gprofiler(
genome = params$genome,
ordered_query = T
)
}) %>%
ungroup()
if (uniq_GO && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Set cluster order based on order of input_cols
fig_clusters <- input_cols %>%
set_cluster_order(markers)
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- markers %>%
filter(cluster == clust)
fig_GO <- GO_df %>%
filter(cluster == clust)
# Create reference umap
ref_umap <- input_umap
umap_col <- input_cols[clust]
if (is.null(input_umap)) {
umap_levels <- input_cols[names(input_cols) != clust]
umap_levels <- names(c(umap_levels, umap_col))
ref_umap <- input_sobj %>%
create_ref_umap(
feature = clust_column,
plot_cols = input_cols,
feat_levels = umap_levels,
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
}
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = input_cols,
order_boxes = order_boxes,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
...
)
cat(nrow(fig_marks), "marker genes were identified,", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Create v2 panel that splits plots into groups
create_marker_panel_v2 <- function(input_sobj, input_markers, input_cols, grp_column, clust_column,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
uniq_GO = F, umap_mtplyr = 6, xlsx_name = NULL, ...) {
# Set point size
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr * 2.5, 1)
# Figure colors and order
fig_clusters <- input_cols %>%
set_cluster_order(input_markers)
# Find GO terms
GO_df <- input_markers %>%
group_by(cluster) %>%
do({
arrange(., p_val_adj) %>%
pull(gene) %>%
run_gprofiler(
genome = params$genome,
ordered_query = T
)
}) %>%
ungroup()
if (uniq_GO && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- input_markers %>%
filter(cluster == clust)
fig_GO <- GO_df %>%
filter(cluster == clust)
# Set colors
umap_col <- input_cols[clust]
group <- clust %>%
str_remove("^[a-zA-Z0-9_]+-")
grp_regex <- str_c("-", group, "$") %>%
str_replace("\\+", "\\\\+") # include this to escape "+" in names
fig_cols <- input_cols[grepl(grp_regex, names(input_cols))]
fig_cols <- c( "Other" = "#fafafa", fig_cols)
# Create reference UMAP
ref_umap <- input_sobj %>%
FetchData(c("UMAP_1", "UMAP_2", grp_column, clust_column)) %>%
as_tibble(rownames = "cell_id") %>%
mutate(!!sym(clust_column) := if_else(
!!sym(grp_column) != group,
"Other",
!!sym(clust_column)
)) %>%
create_ref_umap(
feature = clust_column,
plot_cols = fig_cols,
feat_levels = names(fig_cols),
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = fig_cols,
group = group,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
...
)
cat(nrow(fig_marks), "marker genes were identified.", nrow(fig_GO), "GO terms were identified.")
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Create panels for manuscript
create_paper_figures <- function(input_sobj, input_cols, summary_fig = NULL, input_umap = NULL, clust_column,
color_guide = guide_legend(override.aes = list(size = 3.5, shape = 16)),
order_boxes = T, uniq_GO = F, umap_mtplyr = 6, xlsx_name = NULL, ...) {
# Set point size
umap_mtplyr <- if_else(ncol(input_sobj) < 500, umap_mtplyr, 1)
ref_mtplyr <- if_else(umap_mtplyr == 1, umap_mtplyr, umap_mtplyr * 2.5)
# Find marker genes
Idents(input_sobj) <- input_sobj %>%
FetchData(clust_column)
markers <- find_markers(input_sobj)
# Find GO terms
GO_df <- markers %>%
group_by(cluster) %>%
do({
arrange(., p_val_adj) %>%
pull(gene) %>%
run_gprofiler(
genome = params$genome,
ordered_query = T
)
}) %>%
ungroup()
if (uniq_GO && nrow(GO_df) > 0) {
GO_df <- GO_df %>%
group_by(term_id) %>%
filter(n() == 1) %>%
ungroup()
}
# Set cluster order based on order of input_cols
fig_clusters <- input_cols %>%
set_cluster_order(markers)
# Create figures
for (i in seq_along(fig_clusters)) {
cat("\n#### ", fig_clusters[i], "\n", sep = "")
# Filter markers and GO terms
clust <- fig_clusters[i]
fig_marks <- markers %>%
filter(cluster == clust)
fig_GO <- GO_df %>%
filter(cluster == clust)
# Create reference umap
ref_umap <- input_umap
umap_col <- input_cols[clust]
if (is.null(input_umap)) {
umap_levels <- input_cols[names(input_cols) != clust]
umap_levels <- names(c(umap_levels, umap_col))
ref_umap <- input_sobj %>%
create_ref_umap(
feature = clust_column,
plot_cols = input_cols,
feat_levels = umap_levels,
pt_mtplyr = ref_mtplyr,
color_guide = color_guide
)
}
# Create panel
marker_fig <- input_sobj %>%
create_marker_fig(
input_markers = fig_marks,
input_GO = fig_GO,
clust_column = clust_column,
input_umap = ref_umap,
umap_color = umap_col,
box_colors = input_cols,
order_boxes = order_boxes,
umap_mtplyr = umap_mtplyr,
xlsx_name = xlsx_name,
sheet_name = clust,
...
)
if (!is.null(summary_fig)) {
marker_fig <- plot_grid(
summary_fig, marker_fig,
rel_heights = c(0.3, 0.7),
ncol = 1,
align = "vh",
axis = "trbl"
)
}
print(marker_fig)
cat("\n\n---\n\n<br>\n\n<br>\n\n")
}
}
# Default chunk options
knitr::opts_chunk$set(message = F, warning = F)
# Load packages
R_packages <- c(
"tidyverse", "Seurat",
"gprofiler2", "knitr",
"cowplot", "ggbeeswarm",
"ggrepel", "RColorBrewer",
"xlsx", "colorblindr",
"ggforce", "broom",
"mixtools", "clustifyr",
"boot", "scales",
"patchwork", "ComplexHeatmap",
"devtools", "broom"
)
for (package in R_packages) {
library(package, character.only = T)
}
# ggplot2 themes
theme_info <- theme_cowplot() +
theme(
plot.title = element_text(face = "plain", size = 20),
strip.background = element_blank(),
strip.text = element_text(face = "plain")
)
umap_theme <- theme_info +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
blank_theme <- umap_theme +
theme(
axis.line = element_blank(),
axis.title = element_blank()
)
# Legend guides
col_guide <- guide_legend(override.aes = list(size = 3.5, shape = 16))
outline_guide <- guide_legend(override.aes = list(
size = 3.5,
shape = 21,
color = "black",
stroke = 0.25
))
# Base color palettes
base_cols <- c(
"#225ea8", # blue
"#e31a1c", # red
"#238443", # green
"#ec7014", # orange
"#6a51a3", # purple
"#c51b7d", # pink
"#8c510a", # brown
"#217D87", # teal, darken("#41b6c4", 0.3)
"#F0E442", # yellow, palette_OkabeIto[4]
"#000000" # black
)
base_cols_paired <- base_cols %>%
map(~ {
.x %>%
lighten(0.25) %>%
desaturate(0.2) %>%
c(.x)
})
names(base_cols_paired) <- base_cols
base_cols <- base_cols %>%
lighten(0.25) %>%
desaturate(0.2) %>%
c(base_cols, .)
# Okabe Ito color palettes
ito_cols <- c(
palette_OkabeIto[1:4], "#d7301f",
palette_OkabeIto[5:6], "#6a51a3",
palette_OkabeIto[7:8]
)
ito_cols_paired <- ito_cols %>%
map(~ c(.x, darken(.x, 0.3)))
names(ito_cols_paired) <- ito_cols
ito_cols <- ito_cols %>%
darken(0.4) %>%
c(ito_cols, ., "#000000")
# Set color palette
theme_cols <- base_cols
paired_cols <- base_cols_paired
theme_cols <- ito_cols
paired_cols <- ito_cols_paired# Add one to variable
plus_one <- function(x, n = 1) {
cmd <- str_c(x, " <<- ", x, " + ", n)
eval(parse(text = cmd))
eval(parse(text = x))
}
# Set colors
set_cols <- function(types_in, cols_in, other_cols) {
types_in <- types_in[!types_in %in% names(other_cols)]
cols_in <- cols_in[!cols_in %in% other_cols]
names(cols_in) <- types_in
cols_in <- cols_in[!is.na(names(cols_in))]
res <- c(cols_in, other_cols)
res
}
# Set subtype colors
set_type_cols <- function(sobjs_in, type_column = "cell_type2", cols_in, other_cols) {
res <- sobjs_in %>%
map(~ {
.x@meta.data %>%
pull(type_column) %>%
unique()
}) %>%
reduce(c) %>%
unique() %>%
set_cols(
cols_in = cols_in,
other_cols = other_cols
)
res
}
# Capitalize first character without modifying other characters
str_to_title_v2 <- function(string) {
cap_first_chr <- function(string) {
chrs <- string %>%
str_split(pattern = "") %>%
unlist()
if (any(chrs %in% LETTERS)) {
return(string)
}
chrs[1] <- str_to_upper(chrs[1])
res <- chrs %>%
reduce(str_c)
res
}
res <- string %>%
map_chr(~ {
.x %>%
str_split(pattern = " ") %>%
unlist() %>%
map_chr(cap_first_chr) %>%
reduce(str_c, sep = " ")
})
res
}
# Subset Seurat objects for plotting
subset_sobj <- function(sobj_in, type, type_column = "cell_type1", subtype_column = "cell_type2",
min_cells = 15, include_types = c("B cell", "T cell", "epithelial"),
orig_idents = c("_1" = "CD45_neg", "_2" = "CD45_pos"),
type_filt = c("DC" = "CD45_pos", "LEC" = "CD45_neg", "fibroblast" = "CD45_neg"),
cap_names = T, ...) {
res <- sobj_in
# Set orig.ident based on cell_id
if (!is.null(orig_idents)) {
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
orig.ident = str_extract(cell_id, "_[0-9]+$"),
orig.ident = orig_idents[orig.ident]
) %>%
column_to_rownames("cell_id")
}
# Filter cells based on orig.ident
if (!is.null(type_filt)) {
res <- res %>%
subset(subset = orig.ident == type_filt[type])
}
# Filter cells based on input cell type
res <- res %>%
subset(subset = !!sym(type_column) %in% c(type, include_types))
if (!type %in% pull(res@meta.data, type_column)) {
stop(str_c("ERROR: Cell type not present after filtering for ", CD45_status))
}
# Filter cells based on number of cells per subtype
if (!is.null(min_cells)) {
keep_cells <- res@meta.data %>%
rownames_to_column("cell_id") %>%
group_by(!!sym(subtype_column)) %>%
filter(n_distinct(cell_id) > min_cells) %>%
pull(cell_id)
res <- res %>%
subset(cells = keep_cells)
}
# Capitalize subtypes
if (cap_names) {
res@meta.data <- res@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(!!sym(subtype_column) := str_to_title_v2(!!sym(subtype_column))) %>%
column_to_rownames("cell_id")
}
# Re-run UMAP
res <- res %>%
cluster_RNA(...)
res
}
# Add arrows to axis
add_arrow_axis <- function(gg_in, fract = 0.1, ...) {
get_line_coords <- function(range_in, fract) {
mn <- range_in[1]
mx <- range_in[2]
dif <- mx - mn
res <- c(mn - (dif * 0.05), mn + (dif * fract))
res
}
x_coords <- ggplot_build(gg_in)$layout$panel_scales_x[[1]]$range$range %>%
get_line_coords(fract = fract)
y_coords <- ggplot_build(gg_in)$layout$panel_scales_y[[1]]$range$range %>%
get_line_coords(fract = fract)
res <- gg_in +
geom_segment(
x = x_coords[1],
xend = x_coords[2],
y = y_coords[1],
yend = y_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
geom_segment(
y = y_coords[1],
yend = y_coords[2],
x = x_coords[1],
xend = x_coords[1],
size = 0.25,
color = "black",
linejoin = "bevel",
arrow = arrow(ends = "last", type = "open", length = unit(0.02, "npc")),
...
) +
theme(axis.title = element_text(hjust = 0, size = 10))
res
}
# Set equal x-axis scales
equalize_x <- function(gg_list_in, log_tran = T, ...) {
set_lims <- function(gg_in, min_x, max_x, log_tran, ...) {
res <- gg_in +
coord_cartesian(xlim = c(min_x, max_x))
if (log_tran) {
res <- res +
scale_x_log10(labels = trans_format("log10", math_format(10^.x)), ...)
}
res
}
gg_ranges <- gg_list_in %>%
map(~ ggplot_build(.x)$layout$panel_scales_x[[1]]$range$range)
min_val <- gg_ranges %>%
map_dbl(~ .x[1]) %>%
min()
max_val <- gg_ranges %>%
map_dbl(~ .x[2]) %>%
max()
res <- gg_list_in %>%
map(
set_lims,
min_x = min_val,
max_x = max_val,
log_tran = log_tran,
...
)
res
}
# Function to create figure panel
create_fig3 <- function(sobj_in, cols_in, subtype_column = "cell_type2", pseudo = 0.03, data_slot = "counts",
box_counts = c("Fold-change relative to T/B cell abundance" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"), plot_title = NULL, arrow_axis = F,
pt_size = 0.1, pt_outline = 0.4, umap_cell_count = F, box_cell_count = T,
control_types = c("B Cell", "T Cell"), ...) {
box_column <- umap_column <- "cell_type"
box_cols <- umap_cols <- cols_in
# Fetch plotting data
data_df <- sobj_in %>%
FetchData(c(subtype_column, box_counts, umap_counts, "UMAP_1", "UMAP_2"), slot = data_slot) %>%
as_tibble(rownames = "cell_id")
# Add pseudo count
if (0 %in% pull(data_df, box_counts)) {
data_df <- data_df %>%
mutate(
pseudo = ifelse(!!sym(box_counts) > 0, !!sym(box_counts), NA),
pseudo = min(pseudo, na.rm = T) * 0.5,
!!sym(box_counts) := !!sym(box_counts) + pseudo
)
}
if (!is.null(names(box_counts))) {
data_df <- data_df %>%
rename(!!box_counts)
box_counts <- names(box_counts)
}
if (!is.null(names(umap_counts))) {
data_df <- data_df %>%
rename(!!umap_counts)
umap_counts <- names(umap_counts)
}
# Set subtype order
# Move select cell types to front of order
data_df <- data_df %>%
mutate(
cell_type = !!sym(subtype_column),
cell_type = fct_reorder(cell_type, !!sym(box_counts), median)
)
type_order <- levels(data_df$cell_type)
if (!is.null(control_types)) {
control_types <- control_types[control_types %in% type_order]
type_order <- type_order[!type_order %in% control_types]
type_order <- c(control_types, type_order)
}
# Count cells for each subtype
data_df <- data_df %>%
group_by(cell_type) %>%
mutate(cell_count = n_distinct(cell_id)) %>%
ungroup() %>%
mutate(cell_type = fct_relevel(cell_type, type_order)) %>%
arrange(cell_type) %>%
mutate(
cell_count = str_c(cell_type, "\n(n = ", cell_count, ")"),
cell_count = fct_inorder(cell_count)
)
# Set cell type colors
names(type_order) <- levels(data_df$cell_count)
cols_df <- tibble(
cell_type = type_order,
cell_count = names(type_order)
)
cols_df <- cols_df %>%
mutate(color = cols_in[cell_type])
# Subtype UMAP
if (umap_cell_count) {
umap_column <- "cell_count"
umap_cols <- setNames(cols_df$color, cols_df$cell_count)
}
umap <- data_df %>%
plot_features(
feature = umap_column,
pt_size = pt_size,
pt_outline = pt_outline,
plot_cols = umap_cols,
feat_levels = rev(names(umap_cols))
) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
ggtitle(plot_title) +
blank_theme +
theme(
plot.title = element_text(size = 12),
legend.position = "none"
) +
theme(...)
if (arrow_axis) {
umap <- add_arrow_axis(umap)
}
# OVA UMAP
if (!is.null(umap_counts)) {
ova_umap <- data_df %>%
plot_features(
feature = umap_counts,
plot_cols = c("#fafafa", "#d7301f"),
pt_size = 0.3,
pt_outline = 0.5,
min_pct = 0.01,
max_pct = 0.99
) +
blank_theme +
theme(
plot.title = element_text(size = 10, hjust = 0.5),
legend.position = "right",
legend.key.width = unit(0.15, "cm"),
legend.key.height = unit(0.30, "cm"),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10)
)
if (arrow_axis) {
ova_umap <- add_arrow_axis(ova_umap)
}
}
# OVA boxes
if (box_cell_count) {
box_column <- "cell_count"
box_cols <- setNames(cols_df$color, cols_df$cell_count)
}
boxes <- data_df %>%
ggplot(aes(!!sym(box_counts), !!sym(box_column), fill = !!sym(box_column))) +
geom_violin(size = 0.3, draw_quantiles = c(0.25, 0.75), alpha = 0.75) +
stat_summary(geom = "point", color = "black", fun = median) +
scale_color_manual(values = box_cols) +
scale_fill_manual(values = box_cols) +
theme_minimal_vgrid() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
axis.title = element_text(size = 10),
axis.text = element_text(size = 10),
axis.ticks.x = element_line(size = 0.1),
panel.grid.major.x = element_line(size = 0.1)
)
res <- list(umap, boxes)
if (!is.null(umap_counts)) {
res <- append(res, list(ova_umap))
}
names(res) <- rep(plot_title, length(res))
res
}
# Plot correlation
plot_corr <- function(sobj_in, x, y, feat, data_slot, cols_in, plot_title, ...) {
res <- sobj_in %>%
FetchData(c(x, y, feat), slot = data_slot) %>%
mutate(
!!sym(x) := log10(!!sym(x)),
!!sym(y) := log10(!!sym(y))
) %>%
filter(
!!sym(x) != -Inf,
!!sym(y) != -Inf
) %>%
plot_features(
x = x,
y = y,
feature = feat,
data_slot = "counts",
plot_cols = cols_in,
lab_pos = c(0.9, 1),
lab_size = 5,
calc_cor = T,
lm_line = T,
...
) +
ggtitle(plot_title) +
guides(color = guide_legend(override.aes = list(size = 3.5))) +
theme_info +
theme(legend.title = element_blank())
res
}
# Calculate pairwise p-values for gg objects
calc_p_vals <- function(gg_in, sample_name, data_column, type_column, log_tran = T) {
# Pull data from gg object
gg_data <- gg_in$data
# Log transform
if (log_tran) {
gg_data <- gg_data %>%
mutate(!!sym(data_column) := log10(!!sym(data_column)))
}
# Calculate median
gg_stats <- gg_data %>%
group_by(!!sym(type_column)) %>%
summarize(med = median(!!sym(data_column)))
# Run wilcox test
gg_counts <- gg_data %>%
pull(data_column)
gg_groups <- gg_data %>%
pull(type_column)
res <- gg_counts %>%
pairwise.wilcox.test(
g = gg_groups,
p.adj = "bonf"
) %>%
tidy()
# Add medians to data.frame
res <- gg_stats %>%
rename(med_1 = med) %>%
right_join(res, by = c("cell_type" = "group1"))
res <- gg_stats %>%
rename(med_2 = med) %>%
right_join(res, by = c("cell_type" = "group2"))
# Format final table
res <- res %>%
mutate(Sample = sample_name) %>%
select(
Sample,
`Cell type 1` = str_c(type_column, ".y"),
`Median OVA FC 1 (log10)` = med_1,
`Cell type 2` = type_column,
`Median OVA FC 2 (log10)` = med_2,
p.value
)
res
}
# Load Seurat objects
# Avoid loading/clustering same objects twice
so_info <- append(params$fig3_sobjs, params$corr_sobjs) %>%
unique()
so_types <- map_chr(so_info, ~ .x[2])
names(so_types) <- map_chr(so_info, ~ .x[3])
so_paths <- map_chr(so_info, ~ .x[1])
sobjs <- unique(so_paths) %>%
setNames(., .) %>%
map(~ read_rds(file.path(params$rds_dir, .x)))
# Subset objects based on cell types
# Avoid loading/clustering same objects twice
sobjs <- sobjs[match(so_paths, names(sobjs))]
sobjs_sub <- map2(sobjs, so_types, subset_sobj)
names(sobjs_sub) <- names(so_types)
# Split sobjs for figures
fig3_names <- params$fig3_sobjs %>%
map_chr(~ .x[3])
corr_names <- params$corr_sobjs %>%
map_chr(~ .x[3])
fig3_sobjs <- sobjs_sub[fig3_names]
corr_sobjs <- sobjs_sub[corr_names]
# Color palettes
common_cols <- c(
"Epithelial" = "#6a51a3",
"B Cell" = "#E69F00",
"T Cell" = "#009E73"
)
# DC_cols <- sobjs_sub[so_types == "DC"] %>%
DC_cols <- sobjs_sub[so_types[names(sobjs_sub)] == "DC"] %>%
set_type_cols(
cols_in = ito_cols,
other_cols = common_cols
)
LEC_cols <- sobjs_sub[so_types[names(sobjs_sub)] == "LEC"] %>%
set_type_cols(
cols_in = ito_cols,
other_cols = common_cols
)
FRC_cols <- sobjs_sub[so_types[names(sobjs_sub)] == "fibroblast"] %>%
set_type_cols(
cols_in = ito_cols,
other_cols = common_cols
)
# Parameter lists
fig3_params <- list(
sobj_in = fig3_sobjs,
plot_title = names(fig3_sobjs),
cols_in = list(DC_cols, DC_cols, LEC_cols, FRC_cols)
)
corr_params <- list(
sobj_in = corr_sobjs,
plot_title = names(corr_sobjs),
cols_in = list(DC_cols, DC_cols, LEC_cols, LEC_cols, FRC_cols, FRC_cols)
)
n_plot <- 0# Create panel plots
gg_list <- fig3_params %>%
pmap(
create_fig3,
box_counts = c("Fold-change relative to T/B cell abundance" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create table of p-values
if (!is.null(params$p_val_xlsx)) {
p_vals <- gg_list[violin_idx] %>%
imap(
calc_p_vals,
data_column = "Fold-change relative to T/B cell abundance",
type_column = "cell_type",
log_tran = T
) %>%
bind_rows()
p_vals %>%
write.xlsx(params$p_val_xlsx)
}
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Figures for all FRC timepoints
# Figure parameters
FRC_idx <- so_types[names(sobjs_sub)] == "fibroblast"
FRC_sobjs <- sobjs_sub[FRC_idx]
FRC_sobjs <- compact(FRC_sobjs[corr_names]) # set sobj order
FRC_params <- list(
sobj_in = FRC_sobjs,
plot_title = names(FRC_sobjs),
cols_in = list(FRC_cols, FRC_cols)
)
# Create panel plots
gg_list <- FRC_params %>%
pmap(
create_fig3,
box_counts = c("Fold-change relative to T/B cell abundance" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list %>%
wrap_plots(ncol = 3, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.08, 1)
)Comparison of our LEC subtypes and Xiang et al. subtypes
# New names and colors
new_types <- c(
"cLEC" = "Ceiling LECs",
"fLEC" = "Floor LECs"
)
new_cols <- LEC_cols <- c(
LEC_cols,
"Ptx3_LEC" = "#D55E00",
"Unassigned" = "#ffffff"
)
# Assign cell types with new reference
load(file.path(params$ref_dir, "ref_LEC_xiang.rda"))
LEC_idx <- so_types[names(sobjs_sub)] == "LEC"
LEC_sobjs <- sobjs_sub[LEC_idx] %>%
map(
clustify,
ref_mat = ref_LEC_xiang,
cluster_col = "RNA_clusters"
) %>%
map(~ {
.x@meta.data <- .x@meta.data %>%
rownames_to_column("cell_id") %>%
mutate(
new_types = if_else(type %in% names(new_types), new_types[type], type),
new_types = str_to_title_v2(new_types),
old_types = cell_type2,
cell_type2 = new_types
) %>%
column_to_rownames("cell_id")
.x
})
LEC_sobjs <- compact(LEC_sobjs[corr_names])
# Figure parameters
LEC_names <- names(LEC_sobjs)
new_names <- LEC_names %>%
str_replace("\\)", ", Xiang et al.)")
LEC_params <- list(
sobj_in = append(LEC_sobjs[c(1, 1)], LEC_sobjs[c(2, 2)]),
plot_title = c(LEC_names[1], new_names[1], LEC_names[2], new_names[2]),
cols_in = list(new_cols, new_cols, new_cols, new_cols),
subtype_column = rep(c("old_types", "new_types"), 2)
)
# Create panel plots
gg_list <- LEC_params %>%
pmap(
create_fig3,
box_counts = c("Fold-change relative to T/B cell abundance" = "ova_fc"),
umap_counts = c("ova counts" = "adt_ovalbumin"),
data_slot = "counts"
) %>%
flatten()
# Set violin scales equal
violin_idx <- seq_along(gg_list) %% 3 == 2
gg_list[violin_idx] <- gg_list[violin_idx] %>%
equalize_x(log_tran = T)
# Create final figure
gg_list[seq_along(gg_list) %% 3 != 0] %>%
wrap_plots(ncol = 2, widths = c(1, 0.75, 1)) +
plot_annotation(tag_levels = "a") &
theme(
plot.margin = unit(c(1, 0.5, 0.5, 0.5), "cm"),
plot.tag = element_text(size = 24, face = "plain"),
plot.tag.position = c(-0.05, 1)
)Correlation between average gene expression for our LEC subtypes and Xiang et al. subtypes
# Function to calculate correlation coefficients
calc_type_cor <- function(sobj_in, ..., assay = "RNA") {
calc_type_expr <- function(sobj_in, column, prefix, assay) {
cell_types <- sobj_in@meta.data %>%
pull(!!sym(column)) %>%
unique()
names(cell_types) <- str_c(prefix, cell_types)
res <- cell_types %>%
map(~ {
sobj_in %>%
subset(!!sym(column) == .x) %>%
GetAssayData(assay = assay, slot = "data") %>%
as.matrix() %>%
rowMeans()
})
res
}
columns <- list(...)
expr_vecs <- columns %>%
imap(~ calc_type_expr(
sobj_in = sobj_in,
column = .x,
prefix = str_c(.y, "_"),
assay = assay
))
res <- expr_vecs %>%
flatten() %>%
do.call(cbind, .) %>%
cor() %>%
round(2)
res
}
# Create correlation matrices
cor_mats <- LEC_sobjs %>%
map(calc_type_cor, "old_types", "new_types") %>%
map(~ .x[grepl("^2_", rownames(.x)), grepl("^1_", colnames(.x))])
# Create heatmaps
cor_heatmaps <- cor_mats %>%
imap(~ Heatmap(
matrix = .x,
col = c("#0072B2", "#ffffff", "#d7301f"),
name = "",
column_title = .y
))
cor_heatmaps[[1]]Correlation between RNA counts and ova counts grouped by cell type
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "cell_type2",
data_slot = "counts",
pt_size = 0.5
) %>%
plot_grid(
plotlist = .,
ncol = 2,
align = "vh",
axis = "trbl"
)Correlation between RNA counts and ova counts grouped by cell type and subtype
pad <- c(2, 2, 9.5, 2, 9.5, 2)
corr_params %>%
pmap(
plot_corr,
x = c("RNA counts (log10)" = "nCount_RNA"),
y = c("ova counts (log10)" = "adt_ovalbumin"),
feat = "cell_type2",
split_id = "cell_type2",
data_slot = "counts",
pt_size = 1,
scales = "free",
ncol = 3
) %>%
map2(pad, ~ {
.x + theme(
legend.position = "none",
strip.text = element_text(size = 14),
plot.margin = unit(c(0.2, 0.2, .y, 0.2), "cm")
)
}) %>%
plot_grid(
plotlist = .,
rel_heights = c(1, 1.3, 1),
align = "v",
ncol = 2
)